{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    mpg  cylinders  displacement horsepower  weight  acceleration  year  \\\n",
      "0  18.0          8         307.0      130.0  3504.0          12.0    70   \n",
      "1  15.0          8         350.0      165.0  3693.0          11.5    70   \n",
      "2  18.0          8         318.0      150.0  3436.0          11.0    70   \n",
      "3  16.0          8         304.0      150.0  3433.0          12.0    70   \n",
      "4  17.0          8         302.0      140.0  3449.0          10.5    70   \n",
      "\n",
      "   origin                   car name  \n",
      "0       1  chevrolet chevelle malibu  \n",
      "1       1          buick skylark 320  \n",
      "2       1         plymouth satellite  \n",
      "3       1              amc rebel sst  \n",
      "4       1                ford torino  \n",
      "      mpg  cylinders  displacement horsepower  weight  acceleration  year  \\\n",
      "393  27.0          4         140.0      86.00  2790.0          15.6    82   \n",
      "394  44.0          4          97.0      52.00  2130.0          24.6    82   \n",
      "395  32.0          4         135.0      84.00  2295.0          11.6    82   \n",
      "396  28.0          4         120.0      79.00  2625.0          18.6    82   \n",
      "397  31.0          4         119.0      82.00  2720.0          19.4    82   \n",
      "\n",
      "     origin         car name  \n",
      "393       1  ford mustang gl  \n",
      "394       2        vw pickup  \n",
      "395       1    dodge rampage  \n",
      "396       1      ford ranger  \n",
      "397       1       chevy s-10  \n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "columns = [\"mpg\", \"cylinders\", \"displacement\", \"horsepower\", \"weight\", \"acceleration\", \"year\", \"origin\", \"car name\"]\n",
    "cars = pd.read_table(\"auto-mpg.data\", delim_whitespace=True, names=columns)\n",
    "print(cars.head(5))\n",
    "print(cars.tail(5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    mpg  cylinders  displacement horsepower  weight  acceleration  year  \\\n",
      "0  18.0          8         307.0      130.0  3504.0          12.0    70   \n",
      "1  15.0          8         350.0      165.0  3693.0          11.5    70   \n",
      "2  18.0          8         318.0      150.0  3436.0          11.0    70   \n",
      "3  16.0          8         304.0      150.0  3433.0          12.0    70   \n",
      "4  17.0          8         302.0      140.0  3449.0          10.5    70   \n",
      "\n",
      "   origin                   car name  cyl_3  cyl_4  cyl_5  cyl_6  cyl_8  \n",
      "0       1  chevrolet chevelle malibu      0      0      0      0      1  \n",
      "1       1          buick skylark 320      0      0      0      0      1  \n",
      "2       1         plymouth satellite      0      0      0      0      1  \n",
      "3       1              amc rebel sst      0      0      0      0      1  \n",
      "4       1                ford torino      0      0      0      0      1  \n",
      "    mpg  displacement horsepower  weight  acceleration  origin  \\\n",
      "0  18.0         307.0      130.0  3504.0          12.0       1   \n",
      "1  15.0         350.0      165.0  3693.0          11.5       1   \n",
      "2  18.0         318.0      150.0  3436.0          11.0       1   \n",
      "3  16.0         304.0      150.0  3433.0          12.0       1   \n",
      "4  17.0         302.0      140.0  3449.0          10.5       1   \n",
      "\n",
      "                    car name  cyl_3  cyl_4  cyl_5   ...     year_73  year_74  \\\n",
      "0  chevrolet chevelle malibu      0      0      0   ...           0        0   \n",
      "1          buick skylark 320      0      0      0   ...           0        0   \n",
      "2         plymouth satellite      0      0      0   ...           0        0   \n",
      "3              amc rebel sst      0      0      0   ...           0        0   \n",
      "4                ford torino      0      0      0   ...           0        0   \n",
      "\n",
      "   year_75  year_76  year_77  year_78  year_79  year_80  year_81  year_82  \n",
      "0        0        0        0        0        0        0        0        0  \n",
      "1        0        0        0        0        0        0        0        0  \n",
      "2        0        0        0        0        0        0        0        0  \n",
      "3        0        0        0        0        0        0        0        0  \n",
      "4        0        0        0        0        0        0        0        0  \n",
      "\n",
      "[5 rows x 25 columns]\n"
     ]
    }
   ],
   "source": [
    "dummy_cylinders = pd.get_dummies(cars[\"cylinders\"], prefix=\"cyl\")\n",
    "#print dummy_cylinders\n",
    "cars = pd.concat([cars, dummy_cylinders], axis=1)\n",
    "print(cars.head())\n",
    "dummy_years = pd.get_dummies(cars[\"year\"], prefix=\"year\")\n",
    "#print dummy_years\n",
    "cars = pd.concat([cars, dummy_years], axis=1)\n",
    "cars = cars.drop(\"year\", axis=1)\n",
    "cars = cars.drop(\"cylinders\", axis=1)\n",
    "print(cars.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "shuffled_rows = np.random.permutation(cars.index)\n",
    "shuffled_cars = cars.iloc[shuffled_rows]\n",
    "highest_train_row = int(cars.shape[0] * .70)\n",
    "train = shuffled_cars.iloc[0:highest_train_row]\n",
    "test = shuffled_cars.iloc[highest_train_row:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "unique_origins = cars[\"origin\"].unique()\n",
    "unique_origins.sort()\n",
    "\n",
    "models = {}\n",
    "features = [c for c in train.columns if c.startswith(\"cyl\") or c.startswith(\"year\")]\n",
    "\n",
    "for origin in unique_origins:\n",
    "    model = LogisticRegression()\n",
    "    \n",
    "    X_train = train[features]\n",
    "    y_train = train[\"origin\"] == origin\n",
    "\n",
    "    model.fit(X_train, y_train)\n",
    "    models[origin] = model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Empty DataFrame\n",
      "Columns: [1, 2, 3]\n",
      "Index: []\n",
      "            1         2         3\n",
      "0    0.449962  0.278233  0.257692\n",
      "1    0.266463  0.420440  0.319938\n",
      "2    0.306623  0.391842  0.299603\n",
      "3    0.253376  0.362190  0.388915\n",
      "4    0.360157  0.420710  0.216831\n",
      "5    0.253376  0.362190  0.388915\n",
      "6    0.266463  0.420440  0.319938\n",
      "7    0.792845  0.094610  0.103560\n",
      "8    0.983246  0.016008  0.020850\n",
      "9    0.817616  0.101673  0.076423\n",
      "10   0.258733  0.299579  0.440839\n",
      "11   0.360157  0.420710  0.216831\n",
      "12   0.958799  0.037250  0.022790\n",
      "13   0.517603  0.248328  0.235285\n",
      "14   0.253376  0.362190  0.388915\n",
      "15   0.958799  0.037250  0.022790\n",
      "16   0.968540  0.034528  0.018801\n",
      "17   0.953930  0.032954  0.028759\n",
      "18   0.896040  0.052606  0.078549\n",
      "19   0.967178  0.018923  0.036452\n",
      "20   0.983246  0.016008  0.020850\n",
      "21   0.517603  0.248328  0.235285\n",
      "22   0.274615  0.408990  0.299642\n",
      "23   0.360157  0.420710  0.216831\n",
      "24   0.967178  0.018923  0.036452\n",
      "25   0.983246  0.016008  0.020850\n",
      "26   0.960296  0.030752  0.028753\n",
      "27   0.517603  0.248328  0.235285\n",
      "28   0.596835  0.129105  0.290712\n",
      "29   0.350129  0.281452  0.353431\n",
      "..        ...       ...       ...\n",
      "90   0.958799  0.037250  0.022790\n",
      "91   0.985165  0.018675  0.014716\n",
      "92   0.596835  0.129105  0.290712\n",
      "93   0.596835  0.129105  0.290712\n",
      "94   0.855712  0.094705  0.063658\n",
      "95   0.817616  0.101673  0.076423\n",
      "96   0.985165  0.018675  0.014716\n",
      "97   0.968540  0.034528  0.018801\n",
      "98   0.306623  0.391842  0.299603\n",
      "99   0.350129  0.281452  0.353431\n",
      "100  0.298484  0.440006  0.252043\n",
      "101  0.879231  0.037415  0.127052\n",
      "102  0.939749  0.020907  0.091442\n",
      "103  0.266463  0.420440  0.319938\n",
      "104  0.369904  0.372282  0.227556\n",
      "105  0.952080  0.034491  0.031533\n",
      "106  0.967178  0.018923  0.036452\n",
      "107  0.350129  0.281452  0.353431\n",
      "108  0.596835  0.129105  0.290712\n",
      "109  0.958799  0.037250  0.022790\n",
      "110  0.918732  0.045426  0.070245\n",
      "111  0.896040  0.052606  0.078549\n",
      "112  0.364068  0.104373  0.574127\n",
      "113  0.974222  0.013113  0.039404\n",
      "114  0.596835  0.129105  0.290712\n",
      "115  0.967178  0.018923  0.036452\n",
      "116  0.958799  0.037250  0.022790\n",
      "117  0.408629  0.212501  0.372139\n",
      "118  0.449962  0.278233  0.257692\n",
      "119  0.896040  0.052606  0.078549\n",
      "\n",
      "[120 rows x 3 columns]\n"
     ]
    }
   ],
   "source": [
    "testing_probs = pd.DataFrame(columns=unique_origins)  \n",
    "print testing_probs\n",
    "\n",
    "for origin in unique_origins:\n",
    "    # Select testing features.\n",
    "    X_test = test[features]   \n",
    "    # Compute probability of observation being in the origin.\n",
    "    testing_probs[origin] = models[origin].predict_proba(X_test)[:,1]\n",
    "print testing_probs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
